November 9th, 2016
set.seed(20161108)
x <- sample(x = 1:10, size = 1000, replace = TRUE,
prob = (1:10)/sum(1:10) )
head(x)
## [1] 8 9 8 9 8 10
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 1.000 5.000 7.000 6.923 9.000 10.000
plot(x)
boxplot(x)
hist(x)
set.seed(201611082) plot(seq_len(length(x)), jitter(x, 0.5))
use "x.dta", clear histogram x
graph box x
graph box x, box(1, fcolor(dkorange)) ///
ytitle(Our X variable) ///
title(A nicer plot) subtitle(Made by Leonardo) ///
caption(This plot is closer to being finished)
library('ggplot2')
## Some example data
head(diamonds)
## # A tibble: 6 × 10 ## carat cut color clarity depth table price x y z ## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> ## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 ## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 ## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 ## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63 ## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 ## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
qplot(x = carat, y = price, data = diamonds)
qplot(x = carat, y = price, color = cut, data = diamonds)
qplot(x = carat, y = price, color = cut, data = diamonds) +
scale_color_brewer(palette = 'PuOr')
qplot(x = carat, y = price, color = cut, data = diamonds) +
scale_color_brewer(palette = 'PuOr') +
facet_grid(. ~ color, labeller = label_both)
qplot(x = carat, y = price, color = cut, data = diamonds) +
scale_color_brewer(palette = 'PuOr') +
facet_grid(. ~ color) + theme_bw(base_size = 20)
http://wwwdasis.samhsa.gov/dasis2/teds.htm
load('teds2014.Rdata')
table(sapply(teds2014, class))
## ## factor numeric ## 60 2
colnames(teds2014)[sapply(teds2014, is.numeric)]
## [1] "CASEID" "DAYWAIT"
qplot(CASEID, DAYWAIT, data = teds2014)
qplot(CASEID, DAYWAIT, color = GENDER, data = teds2014)
qplot(CASEID, DAYWAIT, color = GENDER, data = teds2014) +
facet_grid(. ~ GENDER)
library('productplots')
prodplot(teds2014, ~ GENDER)
prodplot(teds2014, ~ MARSTAT + GENDER)
g1 <- ggplot(data = subset(emp_exons_one_cuts,
Aligner == 'HISAT'),
aes(x = FDR, y = Power, shape = StatMethod,
color = cluster)) +
geom_point(size = 3) + geom_line() +
ylab('Empirical power') +
xlab('Observed FDR (in percent)') +
theme_linedraw(base_size = 16) +
scale_color_brewer(palette = 'Set1', name = 'Group') +
scale_shape_discrete(name = 'Statistical\nmethod')
g1
graph twoway scatter CASEID NUMSUBS
graph box CASEID, by(NUMSUBS)
suppressMessages( library('shinycsv') )
plot_twoway(teds2014$EDUC, teds2014$GENDER, 'educ', 'gender')
df <- subset(teds2014, RACE == 'WHITE') plot_twoway(df$EDUC, df$GENDER, 'educ', 'gender')
df2 <- subset(teds2014, RACE == 'ASIAN') plot_twoway(df2$EDUC, df2$GENDER, 'educ', 'gender')
## Download http://wwwdasis.samhsa.gov/dasis2/teds_pubs/2014/Admissions/teds_a_2014_r.zip
load('teds_a_2014.rda')
set.seed(20161109)
teds2014 <- teds_a_2014[sample(seq_len(nrow(teds_a_2014)), 1e4), ]
save(teds2014, file = 'teds2014.Rdata')
rio::export(teds2014, file = 'teds2014.dta')
R.version.string
## [1] "R version 3.3.1 Patched (2016-10-18 r71535)"
packageVersion('ggplot2')
## [1] '2.1.0'
packageVersion('productplots')
## [1] '0.1.1'
packageVersion('shinycsv')
## [1] '0.99.7'